import graphlab
song_data = graphlab.SFrame('song_data.gl/')
This non-commercial license of GraphLab Create for academic use is assigned to ssq6554@gmail.com and will expire on October 10, 2017.
[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: C:\Users\Beginner\AppData\Local\Temp\graphlab_server_1477729626.log.0
song_data.head()
| user_id | song_id | listen_count | title | artist |
|---|---|---|---|---|
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOAKIMP12A8C130995 | 1 | The Cove | Jack Johnson |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOBBMDR12A8C13253B | 2 | Entre Dos Aguas | Paco De Lucia |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOBXHDL12A81C204C0 | 1 | Stronger | Kanye West |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOBYHAJ12A6701BF1D | 1 | Constellations | Jack Johnson |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SODACBL12A8C13C273 | 1 | Learn To Fly | Foo Fighters |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SODDNQT12A6D4F5F7E | 5 | Apuesta Por El Rock 'N' Roll ... |
Héroes del Silencio |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SODXRTY12AB0180F3B | 1 | Paper Gangsta | Lady GaGa |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOFGUAY12AB017B0A8 | 1 | Stacked Actors | Foo Fighters |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOFRQTD12A81C233C0 | 1 | Sehr kosmisch | Harmonia |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOHQWYZ12A6D4FA701 | 1 | Heaven's gonna burn your eyes ... |
Thievery Corporation feat. Emiliana Torrini ... |
| song |
|---|
| The Cove - Jack Johnson |
| Entre Dos Aguas - Paco De Lucia ... |
| Stronger - Kanye West |
| Constellations - Jack Johnson ... |
| Learn To Fly - Foo Fighters ... |
| Apuesta Por El Rock 'N' Roll - Héroes del ... |
| Paper Gangsta - Lady GaGa |
| Stacked Actors - Foo Fighters ... |
| Sehr kosmisch - Harmonia |
| Heaven's gonna burn your eyes - Thievery ... |
graphlab.canvas.set_target('ipynb')
song_data['song'].show()
len(song_data)
1116609
users = song_data['user_id'].unique()
len(users)
66346
train_data,test_data = song_data.random_split(.8,seed=0)
popularity_model = graphlab.popularity_recommender.create(train_data,
user_id='user_id',
item_id='song')
Recsys training: model = popularity
Warning: Ignoring columns song_id, listen_count, title, artist;
To use one of these as a target column, set target =
and use a method that allows the use of a target.
Preparing data set.
Data has 893580 observations with 66085 users and 9952 items.
Data prepared in: 1.50062s
893580 observations to process; with 9952 unique items.
popularity_model.recommend(users=[users[0]])
| user_id | song | score | rank |
|---|---|---|---|
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Sehr kosmisch - Harmonia | 4754.0 | 1 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Undo - Björk | 4227.0 | 2 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
You're The One - Dwight Yoakam ... |
3781.0 | 3 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Dog Days Are Over (Radio Edit) - Florence + The ... |
3633.0 | 4 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Revelry - Kings Of Leon | 3527.0 | 5 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Horn Concerto No. 4 in E flat K495: II. Romance ... |
3161.0 | 6 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Secrets - OneRepublic | 3148.0 | 7 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Fireflies - Charttraxx Karaoke ... |
2532.0 | 8 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Tive Sim - Cartola | 2521.0 | 9 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Drop The World - Lil Wayne / Eminem ... |
2053.0 | 10 |
popularity_model.recommend(users=[users[1]])
| user_id | song | score | rank |
|---|---|---|---|
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Sehr kosmisch - Harmonia | 4754.0 | 1 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Undo - Björk | 4227.0 | 2 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
You're The One - Dwight Yoakam ... |
3781.0 | 3 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Dog Days Are Over (Radio Edit) - Florence + The ... |
3633.0 | 4 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Revelry - Kings Of Leon | 3527.0 | 5 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Horn Concerto No. 4 in E flat K495: II. Romance ... |
3161.0 | 6 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Secrets - OneRepublic | 3148.0 | 7 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Hey_ Soul Sister - Train | 2538.0 | 8 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Fireflies - Charttraxx Karaoke ... |
2532.0 | 9 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Tive Sim - Cartola | 2521.0 | 10 |
popularity_model.recommend(users=[users[0],users[1]])
| user_id | song | score | rank |
|---|---|---|---|
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Sehr kosmisch - Harmonia | 4754.0 | 1 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Undo - Björk | 4227.0 | 2 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
You're The One - Dwight Yoakam ... |
3781.0 | 3 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Dog Days Are Over (Radio Edit) - Florence + The ... |
3633.0 | 4 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Revelry - Kings Of Leon | 3527.0 | 5 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Horn Concerto No. 4 in E flat K495: II. Romance ... |
3161.0 | 6 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Secrets - OneRepublic | 3148.0 | 7 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Fireflies - Charttraxx Karaoke ... |
2532.0 | 8 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Tive Sim - Cartola | 2521.0 | 9 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Drop The World - Lil Wayne / Eminem ... |
2053.0 | 10 |
personalized_model = graphlab.item_similarity_recommender.create(train_data,
user_id='user_id',
item_id='song')
Recsys training: model = item_similarity
Warning: Ignoring columns song_id, listen_count, title, artist;
To use one of these as a target column, set target =
and use a method that allows the use of a target.
Preparing data set.
Data has 893580 observations with 66085 users and 9952 items.
Data prepared in: 1.34803s
Training model from provided data.
Gathering per-item and per-user statistics.
+--------------------------------+------------+
| Elapsed Time (Item Statistics) | % Complete |
+--------------------------------+------------+
| 0us | 4.5 |
| 31.2ms | 100 |
+--------------------------------+------------+
Setting up lookup tables.
Processing data in one pass using dense lookup tables.
+-------------------------------------+------------------+-----------------+
| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |
+-------------------------------------+------------------+-----------------+
| 211.607ms | 0 | 0 |
| 1.82s | 100 | 9952 |
+-------------------------------------+------------------+-----------------+
Finalizing lookup tables.
Generating candidate set for working with new users.
Finished training in 1.91543s
personalized_model.recommend(users=[users[0]])
| user_id | song | score | rank |
|---|---|---|---|
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Cuando Pase El Temblor - Soda Stereo ... |
0.0194504536115 | 1 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Fireflies - Charttraxx Karaoke ... |
0.0144737317012 | 2 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Love Is A Losing Game - Amy Winehouse ... |
0.0142865960415 | 3 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Marry Me - Train | 0.014133471709 | 4 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Secrets - OneRepublic | 0.013591665488 | 5 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Sehr kosmisch - Harmonia | 0.0133987894425 | 6 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Te Hacen Falta Vitaminas - Soda Stereo ... |
0.0129302831796 | 7 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
OMG - Usher featuring will.i.am ... |
0.0127778282532 | 8 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
Y solo se me ocurre amarte (Unplugged) - ... |
0.0123411279458 | 9 |
| c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ... |
No Dejes Que... - Caifanes ... |
0.0121042499175 | 10 |
personalized_model.recommend(users=[users[1]])
| user_id | song | score | rank |
|---|---|---|---|
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Riot In Cell Block Number Nine - Dr Feelgood ... |
0.0374999940395 | 1 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Sei Lá Mangueira - Elizeth Cardoso ... |
0.0331632643938 | 2 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
The Stallion - Ween | 0.0322580635548 | 3 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Rain - Subhumans | 0.0314159244299 | 4 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
West One (Shine On Me) - The Ruts ... |
0.0306771993637 | 5 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Back Against The Wall - Cage The Elephant ... |
0.0301204770803 | 6 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Life Less Frightening - Rise Against ... |
0.0284431129694 | 7 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
A Beggar On A Beach Of Gold - Mike And The ... |
0.0230024904013 | 8 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Audience Of One - Rise Against ... |
0.0193938463926 | 9 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Blame It On The Boogie - The Jacksons ... |
0.0189873427153 | 10 |
personalized_model.recommend(users=[users[2]])
| user_id | song | score | rank |
|---|---|---|---|
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Grind With Me (Explicit Version) - Pretty Ricky ... |
0.0459424376488 | 1 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
There Goes My Baby - Usher ... |
0.0331920742989 | 2 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Panty Droppa [Intro] (Album Version) - Trey ... |
0.0318566203117 | 3 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Nobody (Featuring Athena Cage) (LP Version) - ... |
0.0278467655182 | 4 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Youth Against Fascism - Sonic Youth ... |
0.0262914180756 | 5 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Nice & Slow - Usher | 0.0239639401436 | 6 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Making Love (Into The Night) - Usher ... |
0.0238176941872 | 7 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Naked - Marques Houston | 0.0228925704956 | 8 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
I.nner Indulgence - DESTRUCTION ... |
0.0220767498016 | 9 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Love Lost (Album Version) - Trey Songz ... |
0.0204497694969 | 10 |
personalized_model.get_similar_items(['With Or Without You - U2'])
| song | similar | score | rank |
|---|---|---|---|
| With Or Without You - U2 | I Still Haven't Found What I'm Looking For ... |
0.042857170105 | 1 |
| With Or Without You - U2 | Hold Me_ Thrill Me_ Kiss Me_ Kill Me - U2 ... |
0.0337349176407 | 2 |
| With Or Without You - U2 | Window In The Skies - U2 | 0.0328358411789 | 3 |
| With Or Without You - U2 | Vertigo - U2 | 0.0300751924515 | 4 |
| With Or Without You - U2 | Sunday Bloody Sunday - U2 | 0.0271317958832 | 5 |
| With Or Without You - U2 | Bad - U2 | 0.0251798629761 | 6 |
| With Or Without You - U2 | A Day Without Me - U2 | 0.0237154364586 | 7 |
| With Or Without You - U2 | Another Time Another Place - U2 ... |
0.0203251838684 | 8 |
| With Or Without You - U2 | Walk On - U2 | 0.0202020406723 | 9 |
| With Or Without You - U2 | Get On Your Boots - U2 | 0.0196850299835 | 10 |
personalized_model.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])
| song | similar | score | rank |
|---|---|---|---|
| Chan Chan (Live) - Buena Vista Social Club ... |
Murmullo - Buena Vista Social Club ... |
0.188118815422 | 1 |
| Chan Chan (Live) - Buena Vista Social Club ... |
La Bayamesa - Buena Vista Social Club ... |
0.18719214201 | 2 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Amor de Loca Juventud - Buena Vista Social Club ... |
0.184834122658 | 3 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Diferente - Gotan Project | 0.0214592218399 | 4 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Mistica - Orishas | 0.0205761194229 | 5 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Hotel California - Gipsy Kings ... |
0.0193049907684 | 6 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Nacà Orishas - Orishas | 0.0191571116447 | 7 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Gitana - Willie Colon | 0.018796980381 | 8 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Le Moulin - Yann Tiersen | 0.018796980381 | 9 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Criminal - Gotan Project | 0.0187793374062 | 10 |
model_performance = graphlab.compare(test_data, [popularity_model, personalized_model], user_sample=0.05)
graphlab.show_comparison(model_performance,[popularity_model, personalized_model])
compare_models: using 2931 users to estimate model performance PROGRESS: Evaluate model M0
recommendations finished on 1000/2931 queries. users per second: 4492.3
recommendations finished on 2000/2931 queries. users per second: 6131.06
Precision and recall summary statistics by cutoff +--------+-----------------+------------------+ | cutoff | mean_precision | mean_recall | +--------+-----------------+------------------+ | 1 | 0.0245649948823 | 0.00612161730381 | | 2 | 0.0228590924599 | 0.011369683427 | | 3 | 0.0213806436938 | 0.0162475749733 | | 4 | 0.0199590583419 | 0.0203157375624 | | 5 | 0.018219037871 | 0.0240192850479 | | 6 | 0.0172864778801 | 0.0271066559961 | | 7 | 0.0164254033241 | 0.0300735637993 | | 8 | 0.0159501876493 | 0.0334411087226 | | 9 | 0.0154289396869 | 0.0357355720969 | | 10 | 0.0149095871716 | 0.0382195347395 | +--------+-----------------+------------------+ [10 rows x 3 columns] PROGRESS: Evaluate model M1
recommendations finished on 1000/2931 queries. users per second: 7429.31
recommendations finished on 2000/2931 queries. users per second: 8136.46
Precision and recall summary statistics by cutoff +--------+-----------------+-----------------+ | cutoff | mean_precision | mean_recall | +--------+-----------------+-----------------+ | 1 | 0.183555100648 | 0.0608062953764 | | 2 | 0.155237120437 | 0.0961782441184 | | 3 | 0.136131013306 | 0.12197044303 | | 4 | 0.121801432958 | 0.141331643072 | | 5 | 0.109996588195 | 0.157212564531 | | 6 | 0.101956101444 | 0.173869876096 | | 7 | 0.0950918750305 | 0.18783266044 | | 8 | 0.0894745820539 | 0.202043918305 | | 9 | 0.0846885780356 | 0.214101382052 | | 10 | 0.0799044694643 | 0.223207688574 | +--------+-----------------+-----------------+ [10 rows x 3 columns] Model compare metric: precision_recall